In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
# import seaborn as sns 
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
import io
import requests
import re

COVID-19 in Italy. Visuals


(alternatively, see results and code together here)

 


Data source: this GitHubi page

Authors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.

Regional data files (Dati per Regione):
  • Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
  • File complessivo: dpc-covid19-ita-regioni.csv
  • File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv

 

In [2]:
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
In [3]:
def dewhite(x):
    ''.join(re.findall('\d+', x))

dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
    
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
In [22]:
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cdate = dat.data.max()

md("Currently data as of date: {}".format(cdate))
Out[22]:

Currently data as of date: 2020-10-15T17:00:00


 

What's in the original dataframe?

In [23]:
md("All column names: {}".format(dat.columns.tolist()))
Out[23]:

All column names: ['data', 'stato', 'codice_regione', 'denominazione_regione', 'lat', 'long', 'ricoverati_con_sintomi', 'terapia_intensiva', 'totale_ospedalizzati', 'isolamento_domiciliare', 'totale_positivi', 'variazione_totale_positivi', 'nuovi_positivi', 'dimessi_guariti', 'deceduti', 'casi_da_sospetto_diagnostico', 'casi_da_screening', 'totale_casi', 'tamponi', 'casi_testati', 'note']

 

Rows for the last 5 days

In [24]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note']

df = pd.merge(df, dt2, left_on='Region', right_on='Region')

df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)

df['NewPositives'] = np.abs(df['NewPositives'])

dat.tail(5)
Out[24]:
data stato codice_regione denominazione_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare ... variazione_totale_positivi nuovi_positivi dimessi_guariti deceduti casi_da_sospetto_diagnostico casi_da_screening totale_casi tamponi casi_testati note
4930 2020-10-15T17:00:00 ITA 19 Sicilia 38.115697 13.362357 468 52 520 4967 ... 300 399 4854 350 6856.0 3835.0 10691 575631 411784.0 NaN
4931 2020-10-15T17:00:00 ITA 9 Toscana 43.769231 11.255889 276 46 322 7584 ... 479 581 11171 1185 15463.0 4799.0 20262 878817 591385.0 NaN
4932 2020-10-15T17:00:00 ITA 10 Umbria 43.106758 12.388247 74 12 86 1731 ... 194 263 2104 91 1903.0 2109.0 4012 243404 144075.0 NaN
4933 2020-10-15T17:00:00 ITA 2 Valle d'Aosta 45.737503 7.320149 14 3 17 343 ... 65 67 1127 146 1472.0 161.0 1633 32441 22227.0 NaN
4934 2020-10-15T17:00:00 ITA 5 Veneto 45.434905 12.338452 279 40 319 7289 ... 426 600 23728 2237 22546.0 11027.0 33573 2114126 825788.0 NaN

5 rows × 21 columns


 

Variables names into English and their explanation

  • HospWithSymptoms : Currently hospitalized patients with symptoms
  • IC : Intensive care
  • HospTotal: Total number of currently hospitalized patients
  • AtHome : Currently at home confinement
  • CurrentlyPositive : Total amount of current positive cases (Hospitalised patients + Home confinement)
  • NewPositives : New amount of positive cases (Actual total amount of current positive cases - total amount of current positive cases of the previous day)
  • TotalCases : Total amount of positive cases
  • NoOfTests : Tests performed
In [25]:
df.tail()
Out[25]:
Date Region Lat Long HospWithSymptoms IC HospTotal AtHome CurrentlyPositive VariationOfPositives NewPositives Recovered Deaths Diagnostico Screening TotalCases NoOfTests casi_testati note Pop
Date
2020-10-11 2020-10-11 Veneto 45.434905 12.338452 238 24 262 5826 6088 256 438 23197 2218 22196.0 9307.0 31503 2063607 806051.0 NaN 4905854
2020-10-12 2020-10-12 Veneto 45.434905 12.338452 244 29 273 6016 6289 201 328 23323 2219 22250.0 9581.0 31831 2069955 808554.0 NaN 4905854
2020-10-13 2020-10-13 Veneto 45.434905 12.338452 272 33 305 6350 6655 366 485 23435 2226 22334.0 9982.0 32316 2074007 810366.0 NaN 4905854
2020-10-14 2020-10-14 Veneto 45.434905 12.338452 271 35 306 6876 7182 527 657 23565 2226 22436.0 10537.0 32973 2095102 818552.0 NaN 4905854
2020-10-15 2020-10-15 Veneto 45.434905 12.338452 279 40 319 7289 7608 426 600 23728 2237 22546.0 11027.0 33573 2114126 825788.0 NaN 4905854

 

(double click and click on legend to select one or multiple regions in the graph)

In [26]:
df2 = df

fig = px.line(df2, x=df2.index, y="NewPositives", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Daily new cases, absolute numbers")
fig.show()
In [27]:
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="MovAv7", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="1-week MA of daily new cases")
fig.show()
In [28]:
df2['NewPos_pc'] = df2['NewPositives']/df2['Pop']*1000_000

df2['NewPos_pc'] = df2['NewPos_pc'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="NewPos_pc", color="Region", 
              hover_name="Region", log_y=False)
fig.update_layout(title="1-week MA of daily new cases, per million")
fig.show()
In [29]:
df2['IC_pc'] = df2['IC']/df2['Pop']*1000_000

fig = px.line(df2, x="Date", y="IC_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current intensive care patients, per million")
fig.show()
In [30]:
df2['Hosp_pc'] = df2['HospTotal']/df2['Pop']*1000000

fig = px.line(df2, x="Date", y="Hosp_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current hospitalized, per million")
fig.show()
In [31]:
df3 = df2.copy()

df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')

fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of new deaths, absolute numbers")
fig.show()
In [32]:
df2['NewNoOfTests'] = df2['NoOfTests'] - df2.groupby(['Region'])['NoOfTests'].transform('shift')
df2.head()

df2['New_per_test'] = df2['NewPositives']/df2['NewNoOfTests']*100

fig = px.line(df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])], 
              x=df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])].index, y="New_per_test", color="Region", hover_name="Region",
        render_mode="svg", log_y=True, line_shape='spline')
fig.update_layout(title="New positive cases in daily tests in Northern regions, %")
fig.show()
In [33]:
df2['Deaths_per_mio'] = (df2['Deaths']/df2['Pop'])*1000_000
fig = px.line(df2, x="Date", y="Deaths_per_mio", color="Region", 
              hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per million")
fig.show()
In [34]:
df2['Change_per_mio'] = df2['VariationOfPositives']/df2['Pop']*1000_000
df2['Change_per_mio'] = df2['Change_per_mio'].rolling(window=7).mean()


# [df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])]
fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_mio", color="Region", hover_name="Date")
fig.update_layout(title="1-week MA of current positive cases, per million (excl. Valle d'Aosta)")
fig.show()

 

Italy as a whole

Data from all regions aggregated

In [35]:
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()

df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal'])

fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True, 
              line_shape='spline')
fig.update_layout(title="Number of daily new positive cases, current IC patients and total hospitalized")
fig.show()
In [ ]: